package com.niit.spark.rdd.test

import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}

/**
 * Date:2025/5/6
 * Author：Ys
 * Description:
 */
object MapPartitionsExercise {
  def main(args: Array[String]): Unit = {
    val sparkConf = new SparkConf().setMaster("local[*]").setAppName("MapPartitionsExercise")
    val sc = new SparkContext(sparkConf)
    val logRDD = sc.parallelize(Seq("User123 2025-04-27 10:00:00 GET /page1",
      "User456 2025-04-27 10:00:01 POST /page2", "User789 2025-04-27 10:00:02 GET /page3"))

    val resRdd: RDD[(String, String)] = logRDD.mapPartitions(iter => { // iter 每一个分区
      iter.map(line => { // line == 》 User123 2025-04-27 10:00:00 GET /page1
        val parts = line.split(" ")

        (parts(0), parts(1))

      })

    })

    resRdd.collect().foreach(println)

    sc.stop()
  }
}